rm(list=ls())

library(pracma)
library(foreign)
library(ggplot2)
library(MASS)
library(xtable)
library(apsrtable)
library(arm)
library(lmtest)
library(nnet)

setwd("'C:/Users/Katie/Google Drive/Duke documents/Spring 2015/MLE/Replication")
load("C:/Users/Katie/Google Drive/Duke documents/Spring 2015/MLE/Replication/LacinaAJPSReplication.RData")

indiadata = x
dim(indiadata)

#renaming data columns so easier to understand
library(plyr)


names(indiadata)[6] = "violence"
names(indiadata)[7] = "peacestate"
names(indiadata)[8] = "statehood"
names(indiadata)[9] = "all"
names(indiadata)[11] = "LNRelINCRep"
names(indiadata)[12] = "lnRelINCRep2"
names(indiadata)[13] = "DemPol"
names(indiadata)[14] = "CultPol"
names(indiadata)[15] = "lnenclINCrep"
names(indiadata)[16] = "LNenclpop"
names(indiadata)[17] = "Aglabor"
names(indiadata)[18] = "landless"
names(indiadata)[19] = "Hindu"
names(indiadata)[20] = "lnkmCAP"

#creating interaction effect variable (violence *ln rel inc rep)


#logistic  regression for table 6
dv = 'statehood'

indiadata$interaction = indiadata$violence *indiadata$LNRelINCRep

ivs = c('interaction', 'violence', 'LNRelINCRep', 'DemPol', 
        'lnenclINCrep', 'LNenclpop', 'Hindu', 'lnkmCAP')
logitform = formula(paste0( dv, ' ~ ', paste(ivs, collapse=' + ') ))

mlogit1 = glm(logitform, data=indiadata, family=binomial(link="logit"))
summary(mlogit1)

#clustering standard errors
cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }


require(foreign)
mlogit1cl = cl(indiadata, mlogit1, indiadata$state) #logit model, clustered SE, equiv to mod 3 in Lacina

#base model performance
names = c("Intercept", "Viol*Rel INC Rep", "Violence",
          "Rel. INC rep", "Dem. polarization", 
          "Absolute level INC rep", "Enclave pop.", "Hindu pop.", 
          "Dist. to Capital")
coefplot(mlogit1, xlim=c(-65,45))

coefplot(mlogit1cl[,1], mlogit1cl[,1],  varnames=names, intercept = TRUE, xlim=c(-65,45),
         main = "Baseline Model: Does a Linguistic Group Attain Statehood?")

#zooming in on key variables
coefplot(mlogit1cl[1:4,1], mlogit1cl[2:4,1],  varnames=names, intercept = TRUE, xlim=c(-4,6),
         main = "Baseline Model: Key Variables")

######logistic reg for table 6- model 4- statehood (cult pol instead of dem pol)#################
ivs4 = c('interaction', 'violence', 'LNRelINCRep', 'CultPol', 
        'lnenclINCrep', 'LNenclpop', 'Hindu', 'lnkmCAP')
logitform4 = formula(paste0( dv, ' ~ ', paste(ivs4, collapse=' + ') ))
mlogit4 =  glm(logitform4, data=indiadata, family=binomial(link="logit"))
summary(mlogit4)
mlogit4cl = cl(indiadata, mlogit4, indiadata$state)


########### trying multinomial logit model ##############
dvMulti = "all"
ivsMulti = c('LNRelINCRep', 'lnRelINCRep2', 'DemPol', 
        'lnenclINCrep', 'LNenclpop', 'Aglabor', 'landless',
        'Hindu', 'lnkmCAP')
multiForm=formula(paste0( dvMulti, ' ~ ', paste(ivsMulti, collapse=' + ') ))
multi = multinom(multiForm, data=indiadata)
summary(multi)



######### running performance stuff on initial logit model (corresponds to mod 3 in Lacina)##############
###########################################################
#10-fold cross-validation

k=6
set.seed(6886)
indiadata$rand = sample(1:k, nrow(indiadata), replace=TRUE)
models = list(list())
colors = c("pink","red","orange","yellow","green","dark green","light blue","blue","purple")
spacing = c(.1, .2, .3, .4, .5, .6)
names = c("Intercept", "Viol*Rel INC Rep", "Violence",
          "Rel. INC rep", "Dem. polarization", 
          "Absolute level INC rep", "Enclave pop.", "Hindu pop.", 
          "Dist. to Capital")
numbers = seq(1:6)
coefplot(mlogit1cl[,1], mlogit1cl[,1],  varnames=names, intercept = TRUE, xlim=c(-65,45),
         main = "Coefficient Estimates, Colored by Fold")
AIC = list()
BIC = list()
sepPlot=list()

for(ii in 1:k){
  
  train = indiadata[indiadata$rand !=ii,]  
  test=indiadata[indiadata$rand==ii,] 
  modelnoCL = glm(logitform, data=train, family=binomial(link="logit"))
  
  
  models[[ii]]= cl(train, modelnoCL, train$state)
  model=cl(train, modelnoCL, train$state)
  
  coefplot(model[,1], model[,1], intercept = TRUE, xlim=c(-35,35), col.pts = colors[ii], main = "Coefficient Estimates, Colored by Fold", add=TRUE, offset=spacing[ii])
  
  #generating predicted values
  trainBeta = coef(modelnoCL)
  X = data.matrix( cbind(1, test[, names(trainBeta)[2:length(trainBeta)] ] ) )
  predVals = X %*% trainBeta
  predProbs = 1/( 1 + exp(-predVals) )
  
  # Running some perf statistics (logLikelihood, AIC, BIC)
  logLike = sum(test$statehood * log(predProbs) + (1 - test$statehood)*log(1-predProbs))
  
  # Using this we can calculate the AIC and BIC
  # AIC: -2 * log-likelihood + 2 * npar
  AIC[[ii]] = -2 * logLike + 2 * length(coef(modelnoCL)) # AIC(m2)
  # BIC: -2 * log-likelihood + log(nrow(data)) * npar
  BIC[[ii]] = -2 * logLike + log(nrow(test)) * length(coef(modelnoCL))
  
  #setting up to plot
  
  sepData = data.frame(prob = predProbs, dv = test$statehood)
  sepData = sepData[order(sepData$prob), ]
  col = c("grey94", "midnightblue") 
  
  #graphing
  sepPlot[[ii]] = ggplot(data=sepData, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
  # Create a rectangular plot to fill
  sepPlot[[ii]] = sepPlot[[ii]] + geom_rect(fill = "grey94")
  # Add predicted probability line
  sepPlot[[ii]] = sepPlot[[ii]] + geom_line(aes(y=prob, x=1:length(dv)), lwd=.8)
  # Add lines to denote events
  sepPlot[[ii]] = sepPlot[[ii]] + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=.8)
  # Color event lines
  sepPlot[[ii]] = sepPlot[[ii]] + scale_color_manual(values = col)
  sepPlot[[ii]] = sepPlot[[ii]] + scale_y_continuous("", breaks = NULL)
  sepPlot[[ii]] = sepPlot[[ii]] + scale_x_continuous("", breaks = NULL)
  sepPlot[[ii]] = sepPlot[[ii]] + theme(
    axis.ticks = element_blank(),
    legend.position = "none", 
    panel.background = element_blank(), 
    panel.grid = element_blank() )
  title =  paste0("Separation Plot for Fold ", numbers[ii])
  sepPlot[[ii]] = sepPlot[[ii]] + ggtitle(title)
  sepPlot[[ii]]
}

#combined plot. This is a function that combines all of the different sepPlots (or other graphs) 
#into one image, for easier viewing
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
  require(grid)
  
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots == 1) {
    print(plots[[1]])
    
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}


combinedSepPlots = multiplot(sepPlot[[1]], sepPlot[[2]], sepPlot[[3]], sepPlot[[4]], sepPlot[[5]], sepPlot[[6]], cols=2)


################################### more performance stuff ############################
#examning whether model better off w/interaction effect or not
summary(mlogit1) # AIC w/interaction term is 67.795
ivsNI = c('violence', 'LNRelINCRep', 'DemPol', 
        'lnenclINCrep', 'LNenclpop', 'Hindu', 'lnkmCAP')
logitformNoInteraction = formula(paste0( dv, ' ~ ', paste(ivsNI, collapse=' + ') ))


#running a model without interaction term
logitModNI = glm(logitformNoInteraction, data=indiadata, family=binomial(link="logit"))
logitmodNIcl = cl(indiadata, logitModNI, indiadata$state) #coeffs change a lot, and now ntohing's sig


#comparing perf diagnostics w/ and w/o interaction term
summary(logitModNI) #AIC w/o interaction term is 67.839
lrtest(mlogit1, logitModNI) #loglikelihood of first is 25, second is 26
anova(mlogit1, logitModNI) #F test is 54 for first, 55 for second (so actually slightly better w/o)

plot(density(indiadata$LNRelINCRep))

#sep plot for model without interaction term to see how it does
#generating predicted values
trainBetaNI = coef(logitModNI)
XnoI = data.matrix( cbind(1, indiadata[, names(trainBetaNI)[2:length(trainBetaNI)] ] ) )
predValsNI = XnoI %*% trainBetaNI
predProbsNI = 1/( 1 + exp(-predValsNI) )


#setting up to plot

sepDataNI = data.frame(prob = predProbsNI, dv = indiadata$statehood)
sepDataNI = sepDataNI[order(sepDataNI$prob), ]
col = c("grey94", 
        "midnightblue") 

#graphing
sepPlotNI = ggplot(data=sepDataNI, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
# Create a rectangular plot to fill
sepPlotNI = sepPlotNI + geom_rect(fill = "grey94")
# Add predicted probability line
sepPlotNI = sepPlotNI + geom_line(aes(y=prob, x=1:length(dv)), lwd=.9)
# Add lines to denote events
sepPlotNI = sepPlotNI + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=.9)
# Color event lines
sepPlotNI = sepPlotNI + scale_color_manual(values = col)
sepPlotNI= sepPlotNI + scale_y_continuous("", breaks = c(0, 0.25, 0.5, 0.75, 1.0))
sepPlotNI = sepPlotNI + scale_x_continuous("", breaks = NULL)
sepPlotNI = sepPlotNI + theme(
  axis.ticks = element_blank(),
  legend.position = "none", 
  panel.background = element_blank(), 
  panel.grid = element_blank() )

sepPlotNI = sepPlotNI + ggtitle("Separation Plot: No Interaction Term")
sepPlotNI


#################graphing interaction effects ##################
#######predicting effect representation on statehood
means=apply(indiadata[,ivs], 2, function(x){ mean(x, na.rm=TRUE) })
minMaxSep=quantile(indiadata[,ivs[3]], probs=c(0,1), na.rm=TRUE)

scens=rbind(c(1, means[1:2], minMaxSep[1], means[4:length(means)]),
            c(1, means[1:2], minMaxSep[2], means[4:length(means)]) )

# Simulate additional parameter estimates from multivariate normal
sims=1000
draws = mvrnorm(sims, coef(mlogit1), vcov(mlogit1))

# Get predicted values using matrix multiplication
preds = draws %*% t(scens)

#plotting sim results
plot(density(preds[,1]), col = "black", bty = "n", las = 1, 
     xlim=c(-8, 6), lwd = 2.5, main='', ylim=c(0,1),
     xlab = "Statehood")
lines(density(preds[,2]), col = "black", lwd = 2.5, lty=2)
title("Figure 1: Relative Representation Effects on Statehood")
legend("topleft", title="Marginal Effects of Congressional Representation", c("Low Rep", "High Rep"),
       lty=c(1,2), col=c("black","black"), cex=.8) 

#additional graphs
##first calc partial effect of violence on statehood by Rel Rep
repRange = sort(indiadata$LNRelINCRep)
ViolenceCoef =coef(mlogit1)['violence']
interactcoef = coef(mlogit1)['interaction']
margViolence = ViolenceCoef + interactcoef*repRange #marg effect of muslim

plot(repRange, margViolence) #accounting for income, so it's x-axis, and then marg effects on y

#partial effect of income on democracy by muslim
muslimRange = c(0,1)
inccoef = coef(lm2)['income']
margIncome = inccoef + musinccoef*muslimRange

plot(muslimRange, margIncome)


####################################################### INDIA GRAPHS#####################################
library(raster)
library(rgdal)
library(rgeos)
library(ggplot2)
library(dplyr)
library(mapproj)

### Get data
india <- getData("GADM", country = "India", level = 1)
indiaDistrict <- getData("GADM", country = "India", level = 2)
adminNames = c(unique(indiaDistrict$NAME_2), unique(india$NAME_1))
ajmer = subset(indiaDistrict, NAME_2 == "Ajmer")
test = ifelse(adminNames == indiadata$state, 1, 0)

#NOTE TO SELF: REMAKE INDIA GRAPH, COLOR IF DISTRICTS OR PROVINCES ARE IN LACINAS ########
#nice india graph
map <- fortify(india)
map$id <- as.integer(map$id)

dat <- data.frame(id = 1:(length(india@data$NAME_1)), state = india@data$NAME_1)
map_df <- inner_join(map, dat, by = "id")

centers <- data.frame(gCentroid(india, byid = TRUE))
centers$state <- dat$state


### This is hrbrmstr's own function
theme_map <- function (base_size = 12, base_family = "") {
  theme_gray(base_size = base_size, base_family = base_family) %+replace% 
    theme(
      axis.line=element_blank(),
      axis.text.x=element_blank(),
      axis.text.y=element_blank(),
      axis.ticks=element_blank(),
      axis.ticks.length=unit(0.3, "lines"),
      axis.ticks.margin=unit(0.5, "lines"),
      axis.title.x=element_blank(),
      axis.title.y=element_blank(),
      legend.background=element_rect(fill="white", colour=NA),
      legend.key=element_rect(colour="white"),
      legend.key.size=unit(1.5, "lines"),
      legend.position="right",
      legend.text=element_text(size=rel(1.2)),
      legend.title=element_text(size=rel(1.4), face="bold", hjust=0),
      panel.background=element_blank(),
      panel.border=element_blank(),
      panel.grid.major=element_blank(),
      panel.grid.minor=element_blank(),
      panel.margin=unit(0, "lines"),
      plot.background=element_blank(),
      plot.margin=unit(c(1, 1, 0.5, 0.5), "lines"),
      plot.title=element_text(size=rel(1.8), face="bold", hjust=0.5),
      strip.background=element_rect(fill="grey90", colour="grey50"),
      strip.text.x=element_text(size=rel(0.8)),
      strip.text.y=element_text(size=rel(0.8), angle=-90) 
    )   
}

indiaPlot =  ggplot() +
  geom_map(data = map_df, map = map_df,
           aes(map_id = id, x = long, y = lat, group = group),
           color = "white", fill = "antiquewhite2", size = .25) +
  geom_text(data = centers, aes(label = state, x = x, y = y), size = 2.25) +
  coord_map() +
  labs(x = "", y = "") + theme(axis.ticks.y = element_blank(),axis.text.y = element_blank(), # get rid of x ticks/text
                               axis.ticks.x = element_blank(),axis.text.x = element_blank(),
panel.background = element_rect(fill = "slategray3"))
indiaPlot
ggsave(indiaPlot, file="Indiaplot.png", dpi = 600)


#graphing Lacina states
mapDist <- fortify(indiaDistrict)
mapDist$id <- as.integer(mapDist$id)

datDist <- data.frame(id = 1:(length(indiaDistrict@data$NAME_2)), state = indiaDistrict@data$NAME_2)
map_dfDist <- inner_join(mapDist, datDist, by = "id")

centersDist <- data.frame(gCentroid(indiaDistrict, byid = TRUE))
centersDist$state <- datDist$state
indiaPlotDistrict = ggplot() +
  geom_map(data = map_dfDist, map = map_dfDist,
           aes(map_id = id, x = long, y = lat, group = group),
           color = "white", fill = "bisque2", size = .25) +
  geom_text(data = centers, aes(label = state, x = x, y = y), size = 2.5) +
  coord_map() +
  labs(x = "", y = "", title = "Districts of India") + theme(panel.background = element_rect(fill = "slategrey2"))
indiaPlotDistrict

#creating map to color districts that are included in Lacina's
map_df2 <- transform(map_Dist, hue = ifelse(state == "Assam", "a", NA))



#################################################################################
############ Spatial Analysis ###########################################################################

#trying to tell whether or not spatial variables matter. Looking specifically at whether violence in neighboring enclave
# affects current enclave's chance of statehood.
#created adjacency matrix, which looks at 1950 states, 1 if adjacent and 0 otherwise. 25X25 matrix (once names stripped)

adjacency = read.csv("C:/Users/Katie/Google Drive/Duke documents/Spring 2015/MLE/Replication/adjacency.csv")
adjacency = as.matrix(adjacency)

#now create spatially lagged DV. this is proportion of enclaves in a state that gained statehood
stateOutcome = c(1, 0, 0, 1/3, 0, .5, 1, 0, 0, 1, 0, 0, .2, 4/7, 0, .5, 0, 0, 1/3, 0, 1, 0, 0, 0, 0)

#creating spatial variable to actually use in regression
stateSpatial = adjacency %*% stateOutcome
indiadata$spatial = seq(1:63)
indiadata$spatial[1] = 1/3
indiadata$spatial[2:9] = 0
indiadata$spatial[10] = 0.5333333
indiadata$spatial[11:13] = .2
indiadata$spatial[14] = 0
indiadata$spatial[15:18] = 2.604762
indiadata$spatial[19] = 1.071429 
indiadata$spatial[20] = 0
indiadata$spatial[21] = 0
indiadata$spatial[22:24] = 1.271429
indiadata$spatial[25] = 0.8333333
indiadata$spatial[26:27] = 1.033333
indiadata$spatial[28:32] = 2.4047619
indiadata$spatial[33:39] = 4.2
indiadata$spatial[40] = 0
indiadata$spatial[41:42] = 2.0714286
indiadata$spatial[43:45] = 1/3
indiadata$spatial[46:50] = 1/3
indiadata$spatial[51:53] = 1.5
indiadata$spatial[54] = .5
indiadata$spatial[55] = 0.5714286
indiadata$spatial[56] = 0
indiadata$spatial[57:60] = 0.8666667
indiadata$spatial[61] = 1/3
indiadata$spatial[62:63] = .2

spatialivs = c('spatial', 'interaction', 'violence', 'LNRelINCRep', 'DemPol', 
                     'lnenclINCrep', 'LNenclpop', 'Hindu', 'lnkmCAP')
spatialLogitForm = formula(paste0( dv, ' ~ ', paste(spatialivs, collapse=' + ') ))

spatialModel = glm(spatialLogitForm, data=indiadata, family=binomial(link="logit"))
summary(spatialModel)

spatialModelcl = cl(indiadata, spatialModel, indiadata$state)
spatialModelkey = spatialModelcl[2:5,] #extracting only key vars for the coefplot
names2 = c("Intercept", "Spatial", "Viol*Rel INC Rep", "Violence",
          "Rel. INC rep", "Dem. polarization", 
          "Absolute level INC rep", "Enclave pop.", "Hindu pop.", 
          "Dist. to Capital")
spatNames = c("Spatial", "Viol*Rel INC Rep", "Violence",
              "Rel. INC rep")
colorSpat = c("midnightblue", "#CC79A7", "#CC79A7", "#CC79A7")
coefplot(spatialModelkey[,1], spatialModelkey[,2], xlim=c(-2,4), cex.pts=2.5,
         varnames=spatNames, col.pts=colorSpat, main="Spatial Model: Main Coefficients")

#sep plot for base model of spatial analysis
#sep plot for baseline model
#generating predicted values
trainBetaspatial = coef(spatialModel)
Xspatial = data.matrix( cbind(1, indiadata[, names(trainBetaspatial)[2:length(trainBetaspatial)] ] ) )
predValsspatial = Xspatial %*% trainBetaspatial
predProbsspatial = 1/( 1 + exp(-predValsspatial) )


#setting up to plot

sepDataspatial = data.frame(prob = predProbsspatial, dv = indiadata$statehood)
sepDataspatial = sepDataspatial[order(sepDataspatial$prob), ]
col = c("grey94", 
        "midnightblue") 

#graphing
sepPlotBaseSpatial = ggplot(data=sepDataspatial, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
# Create a rectangular plot to fill
sepPlotBaseSpatial = sepPlotBaseSpatial + geom_rect(fill = "grey94")
# Add predicted probability line
sepPlotBaseSpatial = sepPlotBaseSpatial + geom_line(aes(y=prob, x=1:length(dv)), lwd=.9)
# Add lines to denote events
sepPlotBaseSpatial = sepPlotBaseSpatial + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=.9)
# Color event lines
sepPlotBaseSpatiall = sepPlotBaseSpatial + scale_color_manual(values = col)
sepPlotBaseSpatial= sepPlotBaseSpatial + scale_y_continuous("", breaks = c(0, 0.25, 0.5, 0.75, 1.0))
sepPlotBaseSpatial = sepPlotBaseSpatial + scale_x_continuous("", breaks = NULL)
sepPlotBaseSpatial = sepPlotBaseSpatial + theme(
  axis.ticks = element_blank(),
  legend.position = "none", 
  panel.background = element_blank(), 
  panel.grid = element_blank() )

sepPlotBaseSpatial = sepPlotBaseSpatial + ggtitle("Accounting for Spatial Effects")
sepPlotBaseSpatial



#separation plot, k fold stuff for Spatial Analysis
modelsSpatial = list(list())
#colors = c("pink","red","orange","yellow","green","dark green","light blue","blue","purple")
#spacing = c(.1, .2, .3, .4, .5, .6)
#names = c("Intercept", "Viol*Rel INC Rep", "Violence",
          "Rel. INC rep", "Dem. polarization", 
          "Absolute level INC rep", "Enclave pop.", "Hindu pop.", 
          "Dist. to Capital")
numbers = seq(1:6)
#coefplot(logitmodNIcl[,1], logitmodNIcl[,1],  varnames=names, xlim=c(-65,45),
#         main = "Coefficient Estimates, No Interaction")
AICspatial = list()
BICspatial = list()
sepPlotspatial=list()

for(ii in 1:k){
  
  train = indiadata[indiadata$rand !=ii,]  
  test=indiadata[indiadata$rand==ii,] 
  modSpatial = glm(spatialLogitForm, data=train, family=binomial(link="logit"))
  
  
  modelsSpatial[[ii]]= cl(train, modSpatial, train$state)
  model=cl(train, modSpatial, train$state)
  
  #coefplot(model[,1], model[,1], intercept = TRUE, xlim=c(-35,35), col.pts = colors[ii], main = "Coefficient Estimates, Colored by Fold", add=TRUE, offset=spacing[ii])
  
  #generating predicted values
  trainBeta = coef(modSpatial)
  X = data.matrix( cbind(1, train[, names(trainBeta)[2:length(trainBeta)] ] ) )
  predVals = X %*% trainBeta
  predProbs = 1/( 1 + exp(-predVals) )
  
  # Running some perf statistics (logLikelihood, AIC, BIC)
  #logLike = sum(test$statehood * log(predProbs) + (1 - test$statehood)*log(1-predProbs))
  
  # Using this we can calculate the AIC and BIC
  # AIC: -2 * log-likelihood + 2 * npar
  #AICspatial[[ii]] = -2 * logLike + 2 * length(coef(modSpatial)) # AIC(m2)
  # BIC: -2 * log-likelihood + log(nrow(data)) * npar
  #BICspatial[[ii]] = -2 * logLike + log(nrow(test)) * length(coef(modSpatial))
  
  #setting up to plot
  
  sepDataspatial = data.frame(prob = predProbs, dv = train$statehood)
  sepDataspatial = sepDataspatial[order(sepDataspatial$prob), ]
  col = c("grey94", "midnightblue") 
  
  #graphing
  sepPlotspatial[[ii]] = ggplot(data=sepDataspatial, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
  # Create a rectangular plot to fill
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + geom_rect(fill = "grey94")
  # Add predicted probability line
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + geom_line(aes(y=prob, x=1:length(dv)), lwd=.8)
  # Add lines to denote events
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=.8)
  # Color event lines
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + scale_color_manual(values = col)
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + scale_y_continuous("", breaks = c(0, 0.25, 0.5, 0.75, 1.0))
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + scale_x_continuous("", breaks = NULL)
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + theme(
    axis.ticks = element_blank(),
    legend.position = "none", 
    panel.background = element_blank(), 
    panel.grid = element_blank() )
  title =  paste0("Training Set with Spatial Variable: Fold ", numbers[ii])
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + ggtitle(title)
  sepPlotspatial[[ii]]
}

combinedSepPlotsspatial2 = multiplot(sepPlotspatial[[1]], sepPlotspatial[[2]], sepPlotspatial[[3]], sepPlotspatial[[4]], sepPlotspatial[[5]], sepPlotspatial[[6]], cols=2)

############################################################
#now redoing spatial analysis but w/o interaction term
spatialivs2 = c('spatial', 'violence', 'LNRelINCRep', 'DemPol', 
               'lnenclINCrep', 'LNenclpop', 'Hindu', 'lnkmCAP')
spatialLogitForm2 = formula(paste0( dv, ' ~ ', paste(spatialivs2, collapse=' + ') ))
spatialModel2 = glm(spatialLogitForm2, data=indiadata, family=binomial(link="logit"))
summary(spatialModel2)

cl(indiadata, spatialModel2, indiadata$state)
lrtest(spatialModel2, mlogit1)

modelsSpatial = list(list())
#colors = c("pink","red","orange","yellow","green","dark green","light blue","blue","purple")
#spacing = c(.1, .2, .3, .4, .5, .6)
#names = c("Intercept", "Viol*Rel INC Rep", "Violence",
"Rel. INC rep", "Dem. polarization", 
"Absolute level INC rep", "Enclave pop.", "Hindu pop.", 
"Dist. to Capital")
numbers = seq(1:6)
#coefplot(logitmodNIcl[,1], logitmodNIcl[,1],  varnames=names, xlim=c(-65,45),
#         main = "Coefficient Estimates, No Interaction")
AICspatial = list()
BICspatial = list()
sepPlotspatial=list()

for(ii in 1:k){
  
  train = indiadata[indiadata$rand !=ii,]  
  test=indiadata[indiadata$rand==ii,] 
  modSpatial = glm(spatialLogitForm2, data=train, family=binomial(link="logit"))
  
  
  modelsSpatial[[ii]]= cl(train, modSpatial, train$state)
  model=cl(train, modSpatial, train$state)
  
  #coefplot(model[,1], model[,1], intercept = TRUE, xlim=c(-35,35), col.pts = colors[ii], main = "Coefficient Estimates, Colored by Fold", add=TRUE, offset=spacing[ii])
  
  #generating predicted values
  trainBeta = coef(modSpatial)
  X = data.matrix( cbind(1, train[, names(trainBeta)[2:length(trainBeta)] ] ) )
  predVals = X %*% trainBeta
  predProbs = 1/( 1 + exp(-predVals) )
  
  # Running some perf statistics (logLikelihood, AIC, BIC)
  #logLike = sum(test$statehood * log(predProbs) + (1 - test$statehood)*log(1-predProbs))
  
  # Using this we can calculate the AIC and BIC
  # AIC: -2 * log-likelihood + 2 * npar
  #AICspatial[[ii]] = -2 * logLike + 2 * length(coef(modSpatial)) # AIC(m2)
  # BIC: -2 * log-likelihood + log(nrow(data)) * npar
  #BICspatial[[ii]] = -2 * logLike + log(nrow(test)) * length(coef(modSpatial))
  
  #setting up to plot
  
  sepDataspatial = data.frame(prob = predProbs, dv = train$statehood)
  sepDataspatial = sepDataspatial[order(sepDataspatial$prob), ]
  col = c("grey94", "midnightblue") 
  
  #graphing
  sepPlotspatial[[ii]] = ggplot(data=sepDataspatial, aes(ymin=0, ymax=1, xmin=0, xmax=1:length(dv)))
  # Create a rectangular plot to fill
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + geom_rect(fill = "grey94")
  # Add predicted probability line
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + geom_line(aes(y=prob, x=1:length(dv)), lwd=.8)
  # Add lines to denote events
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + geom_linerange(aes(color=factor(dv), x=1:length(dv)), alpha=.8)
  # Color event lines
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + scale_color_manual(values = col)
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + scale_y_continuous("", breaks = c(0, 0.25, 0.5, 0.75, 1.0))
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + scale_x_continuous("", breaks = NULL)
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + theme(
    axis.ticks = element_blank(),
    legend.position = "none", 
    panel.background = element_blank(), 
    panel.grid = element_blank() )
  title =  paste0("Training Set with Spatial Variable: Fold ", numbers[ii])
  sepPlotspatial[[ii]] = sepPlotspatial[[ii]] + ggtitle(title)
  sepPlotspatial[[ii]]
}

combinedSepPlotsspatial2 = multiplot(sepPlotspatial[[1]], sepPlotspatial[[2]], sepPlotspatial[[3]], sepPlotspatial[[4]], sepPlotspatial[[5]], sepPlotspatial[[6]], cols=2)
